Load Packages
First I need to load up the packages I’ll need
library(sf)
Linking to GEOS 3.4.2, GDAL 2.1.2, proj.4 4.9.1
library(ggplot2) #development version!
## devtools::install_github("tidyverse/ggplot2")
library(tidyverse)
Loading tidyverse: tibble
Loading tidyverse: tidyr
Loading tidyverse: readr
Loading tidyverse: purrr
Loading tidyverse: dplyr
Conflicts with tidy packages ------------------------------------------------------
filter(): dplyr, stats
lag(): dplyr, stats
library(readr)
## Not sure about this bit
#library("tidyverse",lib.loc="/Library/Frameworks/R.framework/Versions/3.4/Resources/library")
Import Data
Now I import my data. I filter for the Arran postcodes, (since Arran all begins ‘KA27’).
## Finding the Arran coordinates
library(dplyr)
allcoordinates <- read.csv("alldata/ukpostcodes.csv")
arrancoordinates <- filter(allcoordinates,substr(postcode,1,4)=="KA27")
Plot Coordinates
Now I plot these coordinates.
## Plotting the Arran coordinates
ggplot(data = arrancoordinates) +
geom_point(mapping = aes(x = longitude, y = latitude)) +
ggtitle("Arran Postcodes") +
labs(title = "Arran Postcodes", x = "Longitude", y = "Latitude") +
theme(plot.title = element_text(hjust = 0.5))

Now I create some plots. #Arran Borders
pcs <- read_sf("alldata/Scotland_pcs_2011")
#Print Post codes lists
arransubsect <- filter(pcs,substr(label,1,4)=="KA27")
arransubsect %>%
ggplot() +
geom_sf() +
theme(axis.text.x=element_text(angle=45, hjust = 1))

Shape files
Then I can load the shape files.
#Import SIMD data from http://www.gov.scot/Topics/Statistics/SIMD
#The "new data zone boundaries with SIMD16 ranks (zipped shapefile)"
#'2011 Data Zone boundaries'
DZBoundaries2016 <- read_sf("./alldata/SG_SIMD_2016")
#https://data.gov.uk/dataset/scottish-index-of-multiple-deprivation-simd-2012
#https://data.gov.uk/dataset/scottish-index-of-multiple-deprivation-simd-2012/resource/d6fa8924-83da-4e80-a560-4ef0477f230b
DZBoundaries2012 <- read_sf("./alldata/SG_SIMD_2012")
DZBoundaries2009 <- read_sf("./alldata/SG_SIMD_2009")
DZBoundaries2006 <- read_sf("./alldata/SG_SIMD_2006")
DZBoundaries2004 <- read_sf("./alldata/SG_SIMD_2004")
Load SIMD data
Then (having already downloaded it), I can load the SIMD data.
#Look at data from 2016
SIMD2016 <-read.csv("./alldata/00505244.csv")
SIMD20162 <-read_sf("./alldata/SG_SIMD_2016")
#Look at data from 2012
SIMD2012 <- readxl::read_excel("./alldata/SIMD2012/00410770.xls")
SIMD20122 <- readxl::read_excel("./alldata/SIMD2012/00416552.xls")
#Look at data from 2009
SIMD2009 <- readxl::read_excel("./alldata/SIMD2009/0096578.xls")
SIMD20092 <- readxl::read_excel("./alldata/SIMD2009/0097806.xls")
#Look at data from 2006
# 2009 data - SIMD2006 <- readxl::read_excel("./alldata/SIMD2006/0096578.xls")
SIMD20062 <- readxl::read_excel("./alldata/SIMD2006/0097880.xls")
#Look at data from 2004
SIMD2004 <- readxl::read_excel("./alldata/SIMD2004/0027003.xls")
Select Arran SIMD data
I have to choose the right columns manually in order to select the Arran data.
#Selecting ArranDZ2016
Arrandz <- c(4672,4666,4669,4671,4667,4668,4670)
#Health domain rank
#2016
arran2016 <- SIMD20162[Arrandz,]
#Find postcode look-up, KA27 postcodes. Find unique DZ. Find row positions.
#Selecting ArranDZ2012
Arrandz2012 <- c(4409,4372,4353,4352,4351,4350,4349)
#2012
arran2012 <- DZBoundaries2012[Arrandz2012,]
#2009
arran2009 <- DZBoundaries2009[Arrandz2012,]
#2006
arran2006 <- DZBoundaries2006[Arrandz2012,]
#2004
arran2004 <- DZBoundaries2004[Arrandz2012,]
The reason I’ve downloaded all the datazones shapefiles individually (three steps up), is because they change between 2016 and 2012.
arran20162 <- arran2016 %>%
select(DataZone, geometry, Percentile) %>%
mutate(year="2016")
arran20122 <- arran2012 %>%
select(DataZone, geometry, Percentile) %>%
mutate(year="2012")
arran1612 <- rbind(arran20162,arran20122)
See the small difference.
arran1612 %>%
ggplot() +
geom_sf(aes(fill = DataZone)) +
facet_wrap('year') +
theme(legend.position="none") +
theme(axis.text.x=element_text(angle=45, hjust = 1))

Now I want to plot all the data, first I combine it all into one table. First I subselect the data I want from the appropriate columns.
arran20092 <- arran2009 %>%
select(DataZone, geometry, Percentile) %>%
mutate(year="2009")
arran20062 <- arran2006 %>%
select(DataZone, geometry, Percentile) %>%
mutate(year="2006")
arran20042 <- arran2004 %>%
select(DataZone, geometry, Percentile) %>%
mutate(year="2004")
arransimd <- rbind(arran20162,arran20122,arran20092,arran20062,arran20042)
Then I plot the data zones to look at all of them, just to double-check nothing else changed. (It looks like the only change was between 2012 and 2016).
arransimd %>%
ggplot() +
geom_sf(aes(fill = DataZone)) +
facet_wrap('year') +
theme(legend.position="none") +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank(), axis.text.y=element_blank(),
axis.ticks.y=element_blank())

Arran Percentile Plots
Now I plot the percentiles.
arransimd %>%
ggplot() +
geom_sf(aes(fill = Percentile)) +
facet_wrap('year') +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank(), axis.text.y=element_blank(),
axis.ticks.y=element_blank())

There we are. Not the SIMD health percentiles of Arran zones throughout SIMD history. And I’ve learned a little bit about graphics in R.
If I wanted to I could show the zones individually..
First I find the unique zones. (There are 14. 7 Zones 2016, 7 Zones pre-2016)
datazones <- unique(arransimd$DataZone)
I’ll have to find out a simpler way to do this but..
Pre-2016 Individual Zones
S01004409 <- filter(arransimd, DataZone=="S01004409")
S01004372 <- filter(arransimd, DataZone=="S01004372")
S01004353 <- filter(arransimd, DataZone=="S01004353")
S01004352 <- filter(arransimd, DataZone=="S01004352")
S01004351 <- filter(arransimd, DataZone=="S01004351")
S01004350 <- filter(arransimd, DataZone=="S01004350")
S01004349 <- filter(arransimd, DataZone=="S01004349")
function1 <- function(argument)
{
argument %>%
ggplot() +
geom_sf(aes(fill = Percentile)) +
facet_wrap('year') +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank())
}
function2(S01004409)

function2(S01004372)

function2(S01004353)

function2(S01004352)

function2(S01004351)

function2(S01004350)

function2(S01004349)

This is all well and good. But I found it difficult to remember which zone went where. So I’ve plotted a reference image to go beside the charts. #Pre-2016 Reference Images
function2 <- function(argument)
{
arransubsect %>%
ggplot() +
geom_sf() +
theme(axis.text.x=element_text(angle=45, hjust = 1)) +
geom_sf(data= argument, aes(fill = DataZone))
}
arransubsect <- filter(pcs,substr(label,1,4)=="KA27")
function3(S01004409)

function3(S01004372)

function3(S01004353)

function3(S01004352)

function3(S01004351)

function3(S01004350)

function3(S01004349)

But thinking about it, I could plot all the years together like this. #Pre-2016 Individual Zones shown on whole island
function3 <- function(argument)
{
argument %>%
ggplot() +
geom_sf(data = arransubsect) +
geom_sf(aes(fill = Percentile)) +
facet_wrap('year') +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank())
}
function3(S01004409)

function3(S01004372)

function3(S01004353)

function3(S01004352)

function3(S01004351)
function3(S01004350)
2016
Now I apply the same functions to the post-2016 Zones
S01011177 <- filter(arransimd, DataZone=="S01011177")
S01011171 <- filter(arransimd, DataZone=="S01011171")
S01011174 <- filter(arransimd, DataZone=="S01011174")
S01011176 <- filter(arransimd, DataZone=="S01011176")
S01011172 <- filter(arransimd, DataZone=="S01011172")
S01011173 <- filter(arransimd, DataZone=="S01011173")
S01011175 <- filter(arransimd, DataZone=="S01011175")
function1(S01011177)

function1(S01011171)

function1(S01011174)

function1(S01011176)

function1(S01011172)

function1(S01011173)

function1(S01011175)

function2(S01011177)
function2(S01011171)
function2(S01011174)
function2(S01011176)
function2(S01011172)
function2(S01011173)
function2(S01011175)
function3(S01011177)
function3(S01011171)
function3(S01011174)
function3(S01011176)
function3(S01011172)
function3(S01011173)
function3(S01011175)
Plot the percentiles as bar charts.
arransimd %>%
ggplot(aes(x=year, y=Percentile)) +
geom_bar(stat="identity") +
facet_wrap('DataZone') +
labs(title = "Arran SIMD Datazones", x = "Year", y = "Percentile") +
theme(plot.title = element_text(hjust = 0.5))
Splitting the bar charts up.
Ideally now I’d like to annotate the above data to highlight the 2016 plots, and show where the change in DZ occurs. (I.e draw a polygon around S01011171-S01011177). I don’t know how to do that yet, so what I’ll do now is seperate it into 2 plots.
Pre-2016
arransimd2 <- dplyr::filter(arransimd, year < 2016)
arransimd2 %>%
ggplot(aes(x=year, y=Percentile)) +
geom_bar(stat="identity") +
facet_wrap('DataZone') +
labs(title = "Arran SIMD Datazones", x = "Year", y = "Percentile") +
theme(plot.title = element_text(hjust = 0.5))
Post-2016
arransimd3 <- dplyr::filter(arransimd, year >= 2016)
arransimd3 %>%
ggplot(aes(x=year, y=Percentile)) +
geom_bar(stat="identity") +
facet_wrap('DataZone') +
labs(title = "Arran SIMD Datazones", x = "Year", y = "Percentile") +
theme(plot.title = element_text(hjust = 0.5))
---
title: "Arran"
output:
  html_document: default
  html_notebook: default
---
#Load Packages
First I need to load up the packages I'll need
```{r}
library(sf)
library(ggplot2) #development version!
## devtools::install_github("tidyverse/ggplot2")
library(tidyverse)
library(readr)
## Not sure about this bit
#library("tidyverse",lib.loc="/Library/Frameworks/R.framework/Versions/3.4/Resources/library")
```

#Import Data
Now I import my data. I filter for the Arran postcodes, (since Arran all begins 'KA27').
```{r}
## Finding the Arran coordinates
library(dplyr)
allcoordinates <- read.csv("alldata/ukpostcodes.csv")
arrancoordinates <- filter(allcoordinates,substr(postcode,1,4)=="KA27")
```

#Plot Coordinates
Now I plot these coordinates.
```{r}
## Plotting the Arran coordinates
ggplot(data = arrancoordinates) +
  geom_point(mapping = aes(x = longitude, y = latitude)) +
  ggtitle("Arran Postcodes") +
  labs(title = "Arran Postcodes", x = "Longitude", y = "Latitude") +
  theme(plot.title = element_text(hjust = 0.5))
```

Now I create some plots.
#Arran Borders
```{r}
pcs <- read_sf("alldata/Scotland_pcs_2011")

#Print Post codes lists
arransubsect <- filter(pcs,substr(label,1,4)=="KA27")
arransubsect %>%
  ggplot() +
  geom_sf() +
  theme(axis.text.x=element_text(angle=45, hjust = 1))
```

#Shape files
Then I can load the shape files.
```{r}
#Import SIMD data from http://www.gov.scot/Topics/Statistics/SIMD
#The "new data zone boundaries with SIMD16 ranks (zipped shapefile)"
#'2011 Data Zone boundaries'

DZBoundaries2016 <- read_sf("./alldata/SG_SIMD_2016")

#https://data.gov.uk/dataset/scottish-index-of-multiple-deprivation-simd-2012
#https://data.gov.uk/dataset/scottish-index-of-multiple-deprivation-simd-2012/resource/d6fa8924-83da-4e80-a560-4ef0477f230b
DZBoundaries2012 <- read_sf("./alldata/SG_SIMD_2012")
DZBoundaries2009 <- read_sf("./alldata/SG_SIMD_2009")
DZBoundaries2006 <- read_sf("./alldata/SG_SIMD_2006")
DZBoundaries2004 <- read_sf("./alldata/SG_SIMD_2004")
```

#Load SIMD data
Then (having already downloaded it), I can load the SIMD data.
```{r}
#Look at data from 2016
SIMD2016 <-read.csv("./alldata/00505244.csv")
SIMD20162 <-read_sf("./alldata/SG_SIMD_2016")

#Look at data from 2012
SIMD2012 <- readxl::read_excel("./alldata/SIMD2012/00410770.xls")
SIMD20122 <- readxl::read_excel("./alldata/SIMD2012/00416552.xls")

#Look at data from 2009
SIMD2009 <- readxl::read_excel("./alldata/SIMD2009/0096578.xls")
SIMD20092 <- readxl::read_excel("./alldata/SIMD2009/0097806.xls")

#Look at data from 2006
# 2009 data - SIMD2006 <- readxl::read_excel("./alldata/SIMD2006/0096578.xls")
SIMD20062 <- readxl::read_excel("./alldata/SIMD2006/0097880.xls")

#Look at data from 2004
SIMD2004 <- readxl::read_excel("./alldata/SIMD2004/0027003.xls")
```

#Select Arran SIMD data
I have to choose the right columns manually in order to select the Arran data.
```{r}
#Selecting ArranDZ2016
Arrandz <- c(4672,4666,4669,4671,4667,4668,4670)

#Health domain rank
#2016
arran2016 <- SIMD20162[Arrandz,]

#Find postcode look-up, KA27 postcodes. Find unique DZ. Find row positions.

#Selecting ArranDZ2012
Arrandz2012 <- c(4409,4372,4353,4352,4351,4350,4349)

#2012
arran2012 <- DZBoundaries2012[Arrandz2012,]
#2009
arran2009 <- DZBoundaries2009[Arrandz2012,]
#2006
arran2006 <- DZBoundaries2006[Arrandz2012,]
#2004
arran2004 <- DZBoundaries2004[Arrandz2012,]
```

The reason I've downloaded all the datazones shapefiles individually (three steps up), is because they change between 2016 and 2012.

```{r}
arran20162 <- arran2016 %>%
  select(DataZone, geometry, Percentile)  %>%
  mutate(year="2016")

arran20122 <- arran2012 %>%
  select(DataZone, geometry, Percentile) %>%
  mutate(year="2012")

arran1612 <- rbind(arran20162,arran20122)
```

See the small difference.
```{r}
arran1612 %>%
  ggplot() +
  geom_sf(aes(fill = DataZone)) +
  facet_wrap('year') +
  theme(legend.position="none") +
  theme(axis.text.x=element_text(angle=45, hjust = 1))
```

Now I want to plot all the data, first I combine it all into one table.
First I subselect the data I want from the appropriate columns.
```{r}
arran20092 <- arran2009 %>%
  select(DataZone, geometry, Percentile) %>%
  mutate(year="2009")

arran20062 <- arran2006 %>%
  select(DataZone, geometry, Percentile) %>%
  mutate(year="2006")

arran20042 <- arran2004 %>%
  select(DataZone, geometry, Percentile) %>%
  mutate(year="2004")

arransimd <- rbind(arran20162,arran20122,arran20092,arran20062,arran20042)
```

Then I plot the data zones to look at all of them, just to double-check nothing else changed. (It looks like the only change was between 2012 and 2016).
```{r}
arransimd %>%
  ggplot() +
  geom_sf(aes(fill = DataZone)) +
  facet_wrap('year') +
  theme(legend.position="none") +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank(), axis.text.y=element_blank(),
        axis.ticks.y=element_blank())
```

#Arran Percentile Plots
Now I plot the percentiles.
```{r}
arransimd %>%
  ggplot() +
  geom_sf(aes(fill = Percentile)) +
  facet_wrap('year') +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank(), axis.text.y=element_blank(),
        axis.ticks.y=element_blank())
```

There we are. Not the SIMD health percentiles of Arran zones throughout SIMD history. And I've learned a little bit about graphics in R.

If I wanted to I could show the zones individually..

First I find the unique zones. (There are 14. 7 Zones 2016, 7 Zones pre-2016)

```{r}
datazones <- unique(arransimd$DataZone)
```

I'll have to find out a simpler way to do this but..

#Pre-2016 Individual Zones
```{r}
S01004409 <- filter(arransimd, DataZone=="S01004409")
S01004372 <- filter(arransimd, DataZone=="S01004372")
S01004353 <- filter(arransimd, DataZone=="S01004353")
S01004352 <- filter(arransimd, DataZone=="S01004352")
S01004351 <- filter(arransimd, DataZone=="S01004351")
S01004350 <- filter(arransimd, DataZone=="S01004350")
S01004349 <- filter(arransimd, DataZone=="S01004349")
```

```{r}
function1 <- function(argument) 
{
  argument %>%
  ggplot() +
  geom_sf(aes(fill = Percentile)) +
  facet_wrap('year') +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank())
}
```

```{r}
function1(S01004409)
function1(S01004372)
function1(S01004353)
function1(S01004352)
function1(S01004351)
function1(S01004350)
function1(S01004349)
```

This is all well and good. But I found it difficult to remember which zone went where. So I've plotted a reference image to go beside the charts.
#Pre-2016 Reference Images
```{r}
function2 <- function(argument) 
{
  arransubsect %>%
  ggplot() +
  geom_sf() +
  theme(axis.text.x=element_text(angle=45, hjust = 1)) +
  geom_sf(data= argument, aes(fill = DataZone))
}
```

```{r}
arransubsect <- filter(pcs,substr(label,1,4)=="KA27")

function2(S01004409)
function2(S01004372)
function2(S01004353)
function2(S01004352)
function2(S01004351)
function2(S01004350)
function2(S01004349)
```

But thinking about it, I could plot all the years together like this.
#Pre-2016 Individual Zones shown on whole island
```{r}
function3 <- function(argument) 
{
  argument %>%
  ggplot() +
  geom_sf(data = arransubsect) +
  geom_sf(aes(fill = Percentile)) +
  facet_wrap('year') +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank())
}
```

```{r}
function3(S01004409)
function3(S01004372)
function3(S01004353)
function3(S01004352)
function3(S01004351)
function3(S01004350)
function3(S01004349)
```

#2016
Now I apply the same functions to the post-2016 Zones
```{r}
arransubsect <- filter(pcs,substr(label,1,4)=="KA27")

S01011177 <- filter(arransimd, DataZone=="S01011177")
S01011171 <- filter(arransimd, DataZone=="S01011171")
S01011174 <- filter(arransimd, DataZone=="S01011174")
S01011176 <- filter(arransimd, DataZone=="S01011176")
S01011172 <- filter(arransimd, DataZone=="S01011172")
S01011173 <- filter(arransimd, DataZone=="S01011173")
S01011175 <- filter(arransimd, DataZone=="S01011175")
```

```{r}
function1(S01011177)
function1(S01011171)
function1(S01011174)
function1(S01011176)
function1(S01011172)
function1(S01011173)
function1(S01011175)
```
```{r}
function2(S01011177)
function2(S01011171)
function2(S01011174)
function2(S01011176)
function2(S01011172)
function2(S01011173)
function2(S01011175)
```

```{r}
function3(S01011177)
function3(S01011171)
function3(S01011174)
function3(S01011176)
function3(S01011172)
function3(S01011173)
function3(S01011175)
```

#Plot the percentiles as bar charts.
```{r}
arransimd %>%
ggplot(aes(x=year, y=Percentile)) +
  geom_bar(stat="identity") +
  facet_wrap('DataZone') +
  labs(title = "Arran SIMD Datazones", x = "Year", y = "Percentile") +
  theme(plot.title = element_text(hjust = 0.5))
```

#Splitting the bar charts up.
Ideally now I'd like to annotate the above data to highlight the 2016 plots, and show where the change in DZ occurs. (I.e draw a polygon around S01011171-S01011177).
I don't know how to do that yet, so what I'll do now is seperate it into 2 plots.

Pre-2016
```{r}
arransimd2 <- dplyr::filter(arransimd, year < 2016)

arransimd2 %>%
ggplot(aes(x=year, y=Percentile)) +
  geom_bar(stat="identity") +
  facet_wrap('DataZone') +
  labs(title = "Arran SIMD Datazones", x = "Year", y = "Percentile") +
  theme(plot.title = element_text(hjust = 0.5))
```

Post-2016
```{r}
arransimd3 <- dplyr::filter(arransimd, year >= 2016)

arransimd3 %>%
ggplot(aes(x=year, y=Percentile)) +
  geom_bar(stat="identity") +
  facet_wrap('DataZone') +
  labs(title = "Arran SIMD Datazones", x = "Year", y = "Percentile") +
  theme(plot.title = element_text(hjust = 0.5))
```